{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import gzip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('hosp/diagnoses_icd.csv.gz')\n",
    "pt_icu = pd.read_csv('icu/icustays.csv.gz')\n",
    "df_lab = pd.read_csv('hosp/labevents.csv.gz')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# HF ICD without BNP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "hfref_id = df[df['icd_code'].str.startswith('4282')|df['icd_code'].str.startswith('I502')].subject_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "hfpef_id = df[df['icd_code'].str.startswith('4283')|df['icd_code'].str.startswith('I503')].subject_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "hf_id = pd.concat([hfref_id, hfpef_id])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "252        10000980\n",
       "308        10000980\n",
       "330        10000980\n",
       "355        10000980\n",
       "1180       10002155\n",
       "             ...   \n",
       "4755413    19998330\n",
       "4755429    19998330\n",
       "4755446    19998330\n",
       "4755463    19998330\n",
       "4755736    19998591\n",
       "Name: subject_id, Length: 49725, dtype: int64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hf_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "49725"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(hf_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "50920"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "icu_id = pt_icu.subject_id\n",
    "len(pt_icu.subject_id.unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>stay_id</th>\n",
       "      <th>first_careunit</th>\n",
       "      <th>last_careunit</th>\n",
       "      <th>intime</th>\n",
       "      <th>outtime</th>\n",
       "      <th>los</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10000980</td>\n",
       "      <td>26913865</td>\n",
       "      <td>39765666</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2189-06-27 08:42:00</td>\n",
       "      <td>2189-06-27 20:38:27</td>\n",
       "      <td>0.497535</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>10002013</td>\n",
       "      <td>23581541</td>\n",
       "      <td>39060235</td>\n",
       "      <td>Cardiac Vascular Intensive Care Unit (CVICU)</td>\n",
       "      <td>Cardiac Vascular Intensive Care Unit (CVICU)</td>\n",
       "      <td>2160-05-18 10:00:53</td>\n",
       "      <td>2160-05-19 17:33:33</td>\n",
       "      <td>1.314352</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>10002155</td>\n",
       "      <td>20345487</td>\n",
       "      <td>32358465</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2131-03-09 21:33:00</td>\n",
       "      <td>2131-03-10 18:09:21</td>\n",
       "      <td>0.858576</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>10002155</td>\n",
       "      <td>23822395</td>\n",
       "      <td>33685454</td>\n",
       "      <td>Coronary Care Unit (CCU)</td>\n",
       "      <td>Coronary Care Unit (CCU)</td>\n",
       "      <td>2129-08-04 12:45:00</td>\n",
       "      <td>2129-08-10 17:02:38</td>\n",
       "      <td>6.178912</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10002155</td>\n",
       "      <td>28994087</td>\n",
       "      <td>31090461</td>\n",
       "      <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
       "      <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
       "      <td>2130-09-24 00:50:00</td>\n",
       "      <td>2130-09-27 22:13:41</td>\n",
       "      <td>3.891447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73163</th>\n",
       "      <td>19998330</td>\n",
       "      <td>24096838</td>\n",
       "      <td>31199714</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2178-11-29 21:51:19</td>\n",
       "      <td>2178-11-29 22:44:27</td>\n",
       "      <td>0.036898</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73164</th>\n",
       "      <td>19998330</td>\n",
       "      <td>24096838</td>\n",
       "      <td>33428243</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2178-11-27 22:53:00</td>\n",
       "      <td>2178-11-29 21:29:39</td>\n",
       "      <td>1.942118</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73165</th>\n",
       "      <td>19998330</td>\n",
       "      <td>24492004</td>\n",
       "      <td>32641669</td>\n",
       "      <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
       "      <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
       "      <td>2178-10-01 08:51:00</td>\n",
       "      <td>2178-10-03 23:25:08</td>\n",
       "      <td>2.607037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73166</th>\n",
       "      <td>19998591</td>\n",
       "      <td>24349193</td>\n",
       "      <td>31144045</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2185-07-16 18:48:18</td>\n",
       "      <td>2185-07-26 18:27:01</td>\n",
       "      <td>9.985220</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73167</th>\n",
       "      <td>19998591</td>\n",
       "      <td>24349193</td>\n",
       "      <td>36794489</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>Medical Intensive Care Unit (MICU)</td>\n",
       "      <td>2185-07-03 22:45:00</td>\n",
       "      <td>2185-07-12 13:40:34</td>\n",
       "      <td>8.621921</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>22903 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       subject_id   hadm_id   stay_id  \\\n",
       "1        10000980  26913865  39765666   \n",
       "6        10002013  23581541  39060235   \n",
       "7        10002155  20345487  32358465   \n",
       "8        10002155  23822395  33685454   \n",
       "9        10002155  28994087  31090461   \n",
       "...           ...       ...       ...   \n",
       "73163    19998330  24096838  31199714   \n",
       "73164    19998330  24096838  33428243   \n",
       "73165    19998330  24492004  32641669   \n",
       "73166    19998591  24349193  31144045   \n",
       "73167    19998591  24349193  36794489   \n",
       "\n",
       "                                         first_careunit  \\\n",
       "1                    Medical Intensive Care Unit (MICU)   \n",
       "6          Cardiac Vascular Intensive Care Unit (CVICU)   \n",
       "7                    Medical Intensive Care Unit (MICU)   \n",
       "8                              Coronary Care Unit (CCU)   \n",
       "9      Medical/Surgical Intensive Care Unit (MICU/SICU)   \n",
       "...                                                 ...   \n",
       "73163                Medical Intensive Care Unit (MICU)   \n",
       "73164                Medical Intensive Care Unit (MICU)   \n",
       "73165  Medical/Surgical Intensive Care Unit (MICU/SICU)   \n",
       "73166                Medical Intensive Care Unit (MICU)   \n",
       "73167                Medical Intensive Care Unit (MICU)   \n",
       "\n",
       "                                          last_careunit               intime  \\\n",
       "1                    Medical Intensive Care Unit (MICU)  2189-06-27 08:42:00   \n",
       "6          Cardiac Vascular Intensive Care Unit (CVICU)  2160-05-18 10:00:53   \n",
       "7                    Medical Intensive Care Unit (MICU)  2131-03-09 21:33:00   \n",
       "8                              Coronary Care Unit (CCU)  2129-08-04 12:45:00   \n",
       "9      Medical/Surgical Intensive Care Unit (MICU/SICU)  2130-09-24 00:50:00   \n",
       "...                                                 ...                  ...   \n",
       "73163                Medical Intensive Care Unit (MICU)  2178-11-29 21:51:19   \n",
       "73164                Medical Intensive Care Unit (MICU)  2178-11-27 22:53:00   \n",
       "73165  Medical/Surgical Intensive Care Unit (MICU/SICU)  2178-10-01 08:51:00   \n",
       "73166                Medical Intensive Care Unit (MICU)  2185-07-16 18:48:18   \n",
       "73167                Medical Intensive Care Unit (MICU)  2185-07-03 22:45:00   \n",
       "\n",
       "                   outtime       los  \n",
       "1      2189-06-27 20:38:27  0.497535  \n",
       "6      2160-05-19 17:33:33  1.314352  \n",
       "7      2131-03-10 18:09:21  0.858576  \n",
       "8      2129-08-10 17:02:38  6.178912  \n",
       "9      2130-09-27 22:13:41  3.891447  \n",
       "...                    ...       ...  \n",
       "73163  2178-11-29 22:44:27  0.036898  \n",
       "73164  2178-11-29 21:29:39  1.942118  \n",
       "73165  2178-10-03 23:25:08  2.607037  \n",
       "73166  2185-07-26 18:27:01  9.985220  \n",
       "73167  2185-07-12 13:40:34  8.621921  \n",
       "\n",
       "[22903 rows x 8 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "icu_hf_pt = pt_icu[pt_icu[\"subject_id\"].isin(hf_id)]\n",
    "icu_hf_pt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12426"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(icu_hf_pt[\"subject_id\"].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_lab_id = pd.read_csv('hosp/d_labitems.csv.gz')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_lab_comp = pd.merge(df_lab, df_lab_id, on='itemid', how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "pt_with_pro_bnp = df_lab_comp[(df_lab_comp['itemid'] == 50963)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>labevent_id</th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>specimen_id</th>\n",
       "      <th>itemid</th>\n",
       "      <th>order_provider_id</th>\n",
       "      <th>charttime</th>\n",
       "      <th>storetime</th>\n",
       "      <th>value</th>\n",
       "      <th>valuenum</th>\n",
       "      <th>valueuom</th>\n",
       "      <th>ref_range_lower</th>\n",
       "      <th>ref_range_upper</th>\n",
       "      <th>flag</th>\n",
       "      <th>priority</th>\n",
       "      <th>comments</th>\n",
       "      <th>label</th>\n",
       "      <th>fluid</th>\n",
       "      <th>category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5090</th>\n",
       "      <td>5099</td>\n",
       "      <td>10000935</td>\n",
       "      <td>25849114.0</td>\n",
       "      <td>55494709</td>\n",
       "      <td>50963</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2187-10-11 06:25:00</td>\n",
       "      <td>2187-10-11 18:35:00</td>\n",
       "      <td>___</td>\n",
       "      <td>617.0</td>\n",
       "      <td>pg/mL</td>\n",
       "      <td>0.0</td>\n",
       "      <td>226.0</td>\n",
       "      <td>abnormal</td>\n",
       "      <td>ROUTINE</td>\n",
       "      <td>REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...</td>\n",
       "      <td>NTproBNP</td>\n",
       "      <td>Blood</td>\n",
       "      <td>Chemistry</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5917</th>\n",
       "      <td>5927</td>\n",
       "      <td>10000980</td>\n",
       "      <td>NaN</td>\n",
       "      <td>33821244</td>\n",
       "      <td>50963</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2188-01-03 13:18:00</td>\n",
       "      <td>2188-01-03 14:42:00</td>\n",
       "      <td>___</td>\n",
       "      <td>4571.0</td>\n",
       "      <td>pg/mL</td>\n",
       "      <td>0.0</td>\n",
       "      <td>624.0</td>\n",
       "      <td>abnormal</td>\n",
       "      <td>STAT</td>\n",
       "      <td>REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...</td>\n",
       "      <td>NTproBNP</td>\n",
       "      <td>Blood</td>\n",
       "      <td>Chemistry</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6206</th>\n",
       "      <td>6216</td>\n",
       "      <td>10000980</td>\n",
       "      <td>NaN</td>\n",
       "      <td>46753637</td>\n",
       "      <td>50963</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2189-06-27 06:48:00</td>\n",
       "      <td>2189-06-27 07:56:00</td>\n",
       "      <td>___</td>\n",
       "      <td>2826.0</td>\n",
       "      <td>pg/mL</td>\n",
       "      <td>0.0</td>\n",
       "      <td>624.0</td>\n",
       "      <td>abnormal</td>\n",
       "      <td>STAT</td>\n",
       "      <td>REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...</td>\n",
       "      <td>NTproBNP</td>\n",
       "      <td>Blood</td>\n",
       "      <td>Chemistry</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6891</th>\n",
       "      <td>6902</td>\n",
       "      <td>10000980</td>\n",
       "      <td>NaN</td>\n",
       "      <td>88477575</td>\n",
       "      <td>50963</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2190-11-06 16:00:00</td>\n",
       "      <td>2190-11-06 18:12:00</td>\n",
       "      <td>___</td>\n",
       "      <td>4279.0</td>\n",
       "      <td>pg/mL</td>\n",
       "      <td>0.0</td>\n",
       "      <td>624.0</td>\n",
       "      <td>abnormal</td>\n",
       "      <td>STAT</td>\n",
       "      <td>REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...</td>\n",
       "      <td>NTproBNP</td>\n",
       "      <td>Blood</td>\n",
       "      <td>Chemistry</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7582</th>\n",
       "      <td>7593</td>\n",
       "      <td>10000980</td>\n",
       "      <td>NaN</td>\n",
       "      <td>77976228</td>\n",
       "      <td>50963</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2191-05-30 12:40:00</td>\n",
       "      <td>2191-05-30 13:55:00</td>\n",
       "      <td>___</td>\n",
       "      <td>6440.0</td>\n",
       "      <td>pg/mL</td>\n",
       "      <td>0.0</td>\n",
       "      <td>624.0</td>\n",
       "      <td>abnormal</td>\n",
       "      <td>STAT</td>\n",
       "      <td>REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...</td>\n",
       "      <td>NTproBNP</td>\n",
       "      <td>Blood</td>\n",
       "      <td>Chemistry</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      labevent_id  subject_id     hadm_id  specimen_id  itemid  \\\n",
       "5090         5099    10000935  25849114.0     55494709   50963   \n",
       "5917         5927    10000980         NaN     33821244   50963   \n",
       "6206         6216    10000980         NaN     46753637   50963   \n",
       "6891         6902    10000980         NaN     88477575   50963   \n",
       "7582         7593    10000980         NaN     77976228   50963   \n",
       "\n",
       "     order_provider_id            charttime            storetime value  \\\n",
       "5090               NaN  2187-10-11 06:25:00  2187-10-11 18:35:00   ___   \n",
       "5917               NaN  2188-01-03 13:18:00  2188-01-03 14:42:00   ___   \n",
       "6206               NaN  2189-06-27 06:48:00  2189-06-27 07:56:00   ___   \n",
       "6891               NaN  2190-11-06 16:00:00  2190-11-06 18:12:00   ___   \n",
       "7582               NaN  2191-05-30 12:40:00  2191-05-30 13:55:00   ___   \n",
       "\n",
       "      valuenum valueuom  ref_range_lower  ref_range_upper      flag priority  \\\n",
       "5090     617.0    pg/mL              0.0            226.0  abnormal  ROUTINE   \n",
       "5917    4571.0    pg/mL              0.0            624.0  abnormal     STAT   \n",
       "6206    2826.0    pg/mL              0.0            624.0  abnormal     STAT   \n",
       "6891    4279.0    pg/mL              0.0            624.0  abnormal     STAT   \n",
       "7582    6440.0    pg/mL              0.0            624.0  abnormal     STAT   \n",
       "\n",
       "                                               comments     label  fluid  \\\n",
       "5090  REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...  NTproBNP  Blood   \n",
       "5917  REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...  NTproBNP  Blood   \n",
       "6206  REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...  NTproBNP  Blood   \n",
       "6891  REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...  NTproBNP  Blood   \n",
       "7582  REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...  NTproBNP  Blood   \n",
       "\n",
       "       category  \n",
       "5090  Chemistry  \n",
       "5917  Chemistry  \n",
       "6206  Chemistry  \n",
       "6891  Chemistry  \n",
       "7582  Chemistry  "
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pt_with_pro_bnp.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "icu_bnp = pt_icu[pt_icu[\"subject_id\"].isin(pt_with_pro_bnp[\"subject_id\"])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "14538"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(icu_bnp[\"subject_id\"].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8211"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(icu_bnp[(icu_bnp[\"subject_id\"].isin(icu_hf_pt[\"subject_id\"]))][\"subject_id\"].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6327"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(icu_bnp[~(icu_bnp[\"subject_id\"].isin(icu_hf_pt[\"subject_id\"]))][\"subject_id\"].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Using isin() to create a boolean mask where subject_id is present in pt_with_pro_bnp\n",
    "is_in_pro_bnp = icu_hf_pt[\"subject_id\"].isin(pt_with_pro_bnp[\"subject_id\"])\n",
    "\n",
    "# Using ~ to negate the boolean mask, giving us True where subject_id is NOT in pt_with_pro_bnp\n",
    "not_in_pro_bnp = ~is_in_pro_bnp\n",
    "\n",
    "# Selecting rows from icu_hf_pt where subject_id is NOT in pt_with_pro_bnp\n",
    "hf_pt_without_bnp = icu_hf_pt[not_in_pro_bnp]\n",
    "\n",
    "hf_pt_with_bnp = icu_hf_pt[is_in_pro_bnp]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4215\n",
      "8211\n",
      "12426\n"
     ]
    }
   ],
   "source": [
    "print(len(hf_pt_without_bnp.subject_id.unique()))\n",
    "print(len(hf_pt_with_bnp.subject_id.unique()))\n",
    "print(len(icu_hf_pt.subject_id.unique()))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2351\n",
      "2041\n",
      "4215\n",
      "177\n"
     ]
    }
   ],
   "source": [
    "print(len(hf_pt_without_bnp[hf_pt_without_bnp[\"subject_id\"].isin(hfref_id)].subject_id.unique())) \n",
    "print(len(hf_pt_without_bnp[hf_pt_without_bnp[\"subject_id\"].isin(hfpef_id)].subject_id.unique()))\n",
    "print(len(hf_pt_without_bnp[hf_pt_without_bnp[\"subject_id\"].isin(hfpef_id)|hf_pt_without_bnp[\"subject_id\"].isin(hfref_id)].subject_id.unique()))\n",
    "print(len(hf_pt_without_bnp[hf_pt_without_bnp[\"subject_id\"].isin(hfpef_id)&hf_pt_without_bnp[\"subject_id\"].isin(hfref_id)].subject_id.unique()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Returned PT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "20084622    7\n",
       "28682905    6\n",
       "23006176    6\n",
       "25332191    6\n",
       "20173617    6\n",
       "           ..\n",
       "28310229    1\n",
       "23105989    1\n",
       "27051245    1\n",
       "28383961    1\n",
       "21923841    1\n",
       "Name: hadm_id, Length: 20133, dtype: int64"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "icu_hf_pt[\"hadm_id\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use the duplicated() method to create a boolean mask of duplicate \"hadm_id\"\n",
    "is_duplicate_hadm_id = icu_hf_pt[\"hadm_id\"].duplicated(keep=False)\n",
    "\n",
    "# Use boolean indexing to filter the DataFrame based on the duplicate condition\n",
    "result_df = icu_hf_pt[is_duplicate_hadm_id]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2085"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(result_df[\"subject_id\"].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10581"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bnp_without_icd = icu_bnp[~(icu_bnp[\"subject_id\"].isin(icu_hf_pt[\"subject_id\"]))][\"hadm_id\"].duplicated(keep=False)\n",
    "\n",
    "len(bnp_without_icd)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
